Batch Summary

Expand Code

# Load Libraries
start_time_all <- Sys.time()

options(warn = 1)  # output warnings as they appear for traceability in stdout/stderr record

knitr::opts_chunk$set(echo = TRUE, warning = FALSE) # warnings will go to console

quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}

quiet_library(batchreporter)
quiet_library(Matrix)        # dependency of H5weaver
quiet_library(rhdf5)         # dependency of H5weaver
quiet_library(H5weaver)      # aifi package
quiet_library(HTOparser)     # aifi package
quiet_library(ggplot2)       
quiet_library(dplyr)         # data wrangling
quiet_library(cowplot)       # arranging multiple plots
quiet_library(gt)            # formatted table output
quiet_library(plotly)        # interactive plots
quiet_library(tidyr)         # data wrangling  
quiet_library(jsonlite)      # reading and writing json metadata files
quiet_library(purrr)         
quiet_library(future)        # multi-threading for batch umap creation
quiet_library(future.apply)  # multi-threading for batch umap creation
quiet_library(Seurat)

stm("Starting NGS Batch Report")

stm(paste(c("\t",paste(names(Sys.info()),Sys.info(),sep = ": ")), collapse = "\n\t"))  

Argument Parsing

# give input directory rna-specific name 
if(is.null(params$in_dir)) {
  batch_id <- "X070"
  in_dir <- system.file("extdata/X070", package = "batchreporter")
  in_key <- system.file("extdata/example_sample_key_X070.csv", package = "batchreporter")
  in_config <- system.file("extdata/default_rna_config_v1.csv", package = "batchreporter")
  in_batch_meta <- system.file("extdata/batch-metadata.json", package = "batchreporter")
  in_method_string <- "scrna;scatac;adt;hto"
  out_dir <- tempdir()
} else {
  batch_id <-  params$batch_id  
  in_method_string <- params$in_method
  in_dir <- params$in_dir  
  in_key <- params$in_key  
  in_config <- params$in_config  
  if(is.null(in_config)){
      in_config <- system.file("extdata/default_rna_config_v1.csv", package = "batchreporter")
      stm("No config file provided. Using default_rna_config_v1.csv for pbmc data")
  }
  out_dir <- params$out_dir
  in_batch_meta <- params$in_batch_meta
  if(is.null(in_batch_meta)){in_batch_meta <- NA}
}

stm(paste0("IN Batch        : ", batch_id))
stm(paste0("IN Method       : ", in_method_string))
stm(paste0("IN Directory    : ", in_dir))
stm(paste0("IN Sample Key   : ", in_key))
stm(paste0("IN Config       : ", in_config))
stm(paste0("IN Batch Processing Info : ", in_batch_meta))
stm(paste0("OUT Dir         : ", out_dir))

print(paste0("IN Batch        : ", batch_id))
## [1] "IN Batch        : X000"
print(paste0("IN Method       : ", in_method_string))
## [1] "IN Method       : scrna;scatac;adt"
print(paste0("IN Directory    : ", in_dir))
## [1] "IN Directory    : ~/Packages/batchreporter/inst/extdata/X070"
print(paste0("IN Sample Key   : ", in_key))
## [1] "IN Sample Key   : ~/Packages/batchreporter/inst/extdata/example_sample_key_X070_dummy-nonhashed.csv"
print(paste0("IN Config       : ", in_config))
## [1] "IN Config       : ~/Packages/batchreporter/inst/extdata/default_rna_config_v1.csv"
print(paste0("IN Batch Processing Info : ", in_batch_meta))
## [1] "IN Batch Processing Info : NA"
print(paste0("OUT Dir         : ", out_dir))
## [1] "OUT Dir         : ~/Projects/X070/test_reports"

Check input files

if(!dir.exists(in_dir)) {
  stm(paste("ERROR: Cannot find IN results dir:", in_dir))
  stop()
}
if(!file.exists(in_key)) {
  stm(paste("ERROR: Cannot find IN sample key:", in_key))
  stop()
}
if(!file.exists(in_config)) {
  stm(paste("ERROR: Cannot find IN config file:", in_config))
  stop()
}
if(!is.na(in_batch_meta) && !file.exists(in_batch_meta)) {
  stm(paste("ERROR: Cannot find IN batch meta:", in_batch_meta))
  stop()
}
if(!dir.exists(out_dir)) {
  stm(paste("Creating output directory:", out_dir))
  dir.create(out_dir)
}

out_prefix <- file.path(out_dir, paste0(batch_id, "_"))

Read in the sample key

stm("Reading in sample key")
df_key <- data.table::fread(in_key)
has_controls <- any(grepl("Control", df_key$Type))  # used to control evaluation of batch control-related code chunks

assertthat::assert_that(length(unique(df_key$BatchID)) == 1, msg = "More than 1 batch in input sample key file")
## [1] TRUE
assertthat::assert_that(batch_id == unique(df_key$BatchID), msg = "Batch in input key file does not match input batch value")
## [1] TRUE

Determine which modalities streams were run

defined_modalities <- c("scrna", "scatac", "adt", "hto")

# convert method string to vector
in_method <- strsplit(in_method_string, split = ";")[[1]]
in_method <- tolower(in_method)

# Logic check input methods
if(!all(in_method %in% defined_modalities)){
    unknowns <- setdiff(in_method, defined_modalities)
    stop(sprintf("One or more input methods are not in defined modalities: '%s'. Defined modalities are: [%s]. Input methods should be passed as a ';'-delimited string, ie 'scrna;scatac;hto'.",
                    paste(unknowns, collapse = "', '"),
                    paste(defined_modalities, collapse = ', ')))
} 

has_rna <- "scrna" %in% in_method
has_atac <- "scatac" %in% in_method
has_adt <- "adt" %in% in_method
has_hto <- "hto" %in% in_method

Define and check input folder expectations

if(has_rna){
  in_rna <- file.path(in_dir, "scrna")
  if(!dir.exists(in_rna)){
    stop(sprintf("Expected RNA input directory [%s] does not exist.", in_rna))
  }
}

if(has_atac){
  in_atac <- file.path(in_dir, "scatac")
  if(!dir.exists(in_atac)){
    stop(sprintf("Expected ATAC input directory [%s] does not exist.", in_atac))
  }
}

if(has_adt){
  in_adt <- file.path(in_dir, "adt")
  if(!dir.exists(in_adt)){
    stop(sprintf("Expected ADT input directory [%s] does not exist.", in_adt))
  }
}

if(has_hto){
  in_hto <- file.path(in_dir, "hto")
  if(!dir.exists(in_hto)){
    stop(sprintf("Expected HTO input directory [%s] does not exist.", in_hto))
  }
}

Batch Information

stm("Constructing Batch Information table")

# Summarize batch information, also declare some global batch variables that are used throughout the report
pools <- unique(df_key$PoolID)
n_pools <- length(pools)
if(n_pools==1 & grepl("P0$", pools)){
  n_pools_label <- 0
} else {
  n_pools_label <- n_pools
}

wells <- unlist(strsplit(unique(df_key$WellID), split = ";"))
n_wells <- length(wells)

samples <- unique(df_key$SampleID)
n_samples <- length(samples)

controls <- unique(df_key$SampleID[df_key$Type == "Control"])
control_string <- ifelse(has_controls, paste(controls, collapse = ", "), "None")

if (has_controls){
  n_study_samples <- setdiff(samples, controls)
} else {
  n_study_samples <- samples
}

n_study_samples <- length(n_study_samples)

samples_pool <- tapply(df_key$SampleID, df_key$PoolID, unique)
samples_pool_string <- sapply(samples_pool, function(x){paste(x, collapse = ", ")})

labels <- c("Batch","Libraries","N Samples", "N Pools","N Wells","Batch Control", paste0(pools, " Samples"))
values <-  c(batch_id, in_method_string, n_study_samples, n_pools_label, n_wells, control_string, samples_pool_string)

simple_html_table(labels, values, fontsize = 3, col_widths_px = c(175, 850))
Batch X000
Libraries scrna;scatac;adt
N Samples 3
N Pools 1
N Wells 3
Batch Control None
X000-P1 Samples Sample01, Sample02, Sample03

Batch Processing

if(!is.na(in_batch_meta)){
  if(!file.exists(in_batch_meta)) {stop(sprintf("Supplied in_batch_meta file '%s' does not exist. ",in_batch_meta))}
  meta_table <- jsonlite::read_json(in_batch_meta)
  meta_table <- as.data.frame(meta_table)  # collapses any nested names
  
  if(nrow(meta_table) == 1){
    simple_html_table(colnames(meta_table), unlist(meta_table[1,]), fontsize = 3, col_widths_px = c(200,250))
  } else{
    stop(sprintf("in_batch_meta file should have 1:1 key value pairs. Check supplied file '%s' for proper formatting. ",in_batch_meta))
  }
} else {
  cat("No processing details supplied")
}  

No processing details supplied

scRNA Well

The following metrics summarize the sequencing and alignment by 10x well prior to un-hashing and hash-based cell filtering.

Contents

Data Quality UMAP

Expand Code

Check Dependencies

Reading in well info from h5 files

if(!exists("well_info")){
  # Merge Well Data
  stm("Reading in labeled h5 file well data")
  well_list <- lapply(all_h5, read_h5_well_meta)
  
  # Find common columns
  cols_keep <- Reduce(intersect, lapply(well_list, colnames))
  well_list <- lapply(well_list, function(x){x[,cols_keep]})
  
  well_info <- unique(do.call(rbind, well_list))
  setDT(well_info)
  well_info[, pool_id := gsub("C\\d+W\\d+", "", well_id)]
  
  remove("well_list")
}

Reading in metadata from h5 files

stm("Reading and merging all rna meta data")
rna_meta_list <- lapply(all_h5, H5weaver::read_h5_cell_meta)

# make sure column names are the same
col_list <- lapply(rna_meta_list, colnames)
all_cols_identical <- length(unique(col_list)) == 1
if(!all_cols_identical){
  all_columns <- unique(unlist(lapply(rna_meta_list, colnames)))
  common_columns <- Reduce(union, lapply(rna_meta_list, colnames))
  if(!all(all_columns %in% common_columns)){
    stm(sprintf("Warning: rna h5 files do not contain the same meta data columns. Keeping only the common columns. Removing columns %s.", 
        paste(setdiff(all_columns, common_columns), sep = ", ")))
  }
  rna_meta_list <- lapply(rna_meta_list, function(x){x[, common_columns]})
}

# merge metadata
rna_meta <- do.call(rbind, rna_meta_list)

# add metadata variables
rna_meta$fct_mito_umi <- rna_meta$n_mito_umi/rna_meta$n_umis
fct_mito_grp_cutoffs <- c(-Inf, 0.05, 0.10, 0.20, 0.30,Inf)
fct_mito_grp_labels <- c("0-0.05","0.05-0.10","0.10-0.20","0.20-0.30",">0.30")
rna_meta$fct_mito_group <- cut(rna_meta$fct_mito_umi, breaks =fct_mito_grp_cutoffs,
                           labels = fct_mito_grp_labels)

Read in Counts from H5 Files

stm("Reading and merging all rna count matrices")

rna_count_list <- lapply(all_h5, H5weaver::read_h5_dgCMatrix, target = "matrix", 
                         feature_names = "id")
# make sure all matrices have same number of rows
if(!length(unique(sapply(rna_count_list, nrow)))==1){
  stop("RNA count matrixes have different numbers of rows")
} 

# make sure rows are in same order
row_order <- rownames(rna_count_list[[1]])
rna_count_list <- lapply(rna_count_list, function(x){x[row_order,]})

# make sure columns are in same order as metadata
order_check <- mapply(function(x, y){(all(x$barcodes==colnames(y)))}, rna_meta_list, rna_count_list)
if(!all(order_check)){
  # Reorder matrix columns to be consistent with metadata
  rna_count_list <- mapply( function(x, y){x[,y$barcodes]}, rna_count_list, rna_meta_list)
}

# merge
rna_counts <- do.call(cbind, rna_count_list)

featDF <- read_h5_feature_meta(all_h5[1])

Sample cells from each well to generate umaps

n_cells_sample <- 2000

stm(sprintf("Sampling %s cells per well (or all cells if fewer)", n_cells_sample))

set.seed(3)
sample_index_list <- lapply(rna_meta_list, function(x){
  sample_size = min(n_cells_sample, nrow(x))
  sort(sample(1:nrow(x), size = sample_size, replace = FALSE))
})

n_files <- length(sample_index_list)
rna_meta_list_sampled <- lapply(1:n_files, function(x){
 rna_meta_list[[x]][sample_index_list[[x]],]
})
rna_count_list_sampled <- lapply(1:n_files, function(x){
 rna_count_list[[x]][,sample_index_list[[x]]]
})

rna_meta_sampled <- do.call(rbind, rna_meta_list_sampled)
rna_meta_sampled$fct_mito_umi <- rna_meta_sampled$n_mito_umi/rna_meta_sampled$n_umis
rna_meta_sampled$fct_mito_group <- cut(rna_meta_sampled$fct_mito_umi, breaks =fct_mito_grp_cutoffs,
                           labels = fct_mito_grp_labels)
rna_counts_sampled <- do.call(cbind, rna_count_list_sampled)

rm(list=c("rna_count_list", "rna_count_list_sampled","rna_meta_list"))
vnames_rna <- c("estimated_number_of_cells", "fraction_reads_in_cells", 
            "mean_reads_per_cell", "median_genes_per_cell", "median_umi_counts_per_cell", 
            "number_of_reads", "q30_bases_in_barcode", "q30_bases_in_rna_read", 
            "q30_bases_in_sample_index", "q30_bases_in_umi", "reads_mapped_antisense_to_gene", 
            "reads_mapped_confidently_to_exonic_regions", "reads_mapped_confidently_to_genome", 
            "reads_mapped_confidently_to_intergenic_regions","reads_mapped_confidently_to_intronic_regions", 
            "reads_mapped_confidently_to_transcriptome", "reads_mapped_to_genome", 
            "sequencing_saturation", "total_genes_detected", "valid_barcodes")

vnames_arc <- c("estimated_number_of_cells",
  "gex_fraction_of_transcriptomic_reads_in_cells","gex_mean_raw_reads_per_cell","gex_median_genes_per_cell",
  "gex_median_umi_counts_per_cell","gex_percent_duplicates","gex_q30_bases_in_barcode",
  "gex_q30_bases_in_read_2","gex_q30_bases_in_sample_index_i1","gex_q30_bases_in_sample_index_i2",
  "gex_q30_bases_in_umi","gex_reads_mapped_antisense_to_gene","gex_reads_mapped_confidently_to_exonic_regions",
  "gex_reads_mapped_confidently_to_genome","gex_reads_mapped_confidently_to_intergenic_regions", "gex_reads_mapped_confidently_to_intronic_regions",
  "gex_reads_mapped_confidently_to_transcriptome","gex_reads_mapped_to_genome","gex_reads_with_tso",
  "gex_sequenced_read_pairs","gex_total_genes_detected","gex_valid_barcodes", "gex_valid_umis")  

if(all(vnames_rna %in% colnames(well_info))){
  vnames <- vnames_rna
  vlabels <- c("Estimated Number of Cells", "Fraction Reads in Cells", 
            "Mean Reads per Cell", "Median Genes per Cell", "Median UMI per Cell", 
            "Number of Reads", "Q30 Bases in Barcode (%)", "Q30 Bases in RNA Read (%)", 
            "Q30 Bases in Sample Index (%)", "Q30 Bases in UMI (%)", "Reads Mapped Antisense to Gene (%)", 
            "Reads Mapped Confidently to Exonic Regions (%)", "Reads Mapped Confidently to Genome (%)", 
            "Reads Mapped Confidently to Intergenic Regions (%)",
            "Reads Mapped Confidently to Intronic Regions (%)", 
            "Reads Mapped Confidently to Transcriptome (%)", "Reads Mapped to Genome (%)", 
            "Sequencing Saturation (%)", "Total Genes Detected", "Valid Barcodes (%)")
  n_vars <- length(vnames)
  vartypes <- c(rep("Cells", 5), rep("Sequencing", 5), rep("Mapping", 7),"Sequencing","Cells","Sequencing")
  vartypes <- factor(vartypes,  levels = c("Cells", "Sequencing", "Mapping"))

  digitsRound <- c(0, 1, rep(0, 4), rep(1, 12), 0, 1)
  rna_data_type <- "rna"
} else if (all(vnames_arc %in% colnames(well_info))){
  vnames <- vnames_arc
  vlabels <- c("Estimated Number of Cells","Fraction Transcriptomic Reads in Cells",
            "Mean Raw Reads per Cell", "Median Genes per Cell", "Median UMI per Cell", "Percent Duplicates",
            "Q30 Bases in Barcode", "Q30 Bases in RNA Read 2", "Q30 Bases in Sample Index i1",
            "Q30 Bases in Sample Index i2", "Q30 Bases in UMI", "Reads Mapped Antisense to Gene", 
            "Reads Mapped Confidently to Exonic Regions", "Reads Mapped Confidently to Genome", 
            "Reads Mapped Confidently to Intergenic Regions", 
            "Reads Mapped Confidently to Intronic Regions", 
            "Reads Mapped Confidently to Transcriptome", "Reads Mapped to Genome", "Reads with TSO",
            "Sequenced Read Pairs", "Total Genes Detected", "Valid Barcodes", "Valid UMIs") 
  n_vars <- length(vnames)
  vartypes <- c(rep("Cells", 5), rep("Sequencing", 6), rep("Mapping", 7),"Sequencing","Sequencing","Cells","Sequencing","Sequencing")
  digitsRound <- c(0, 3, 0, 0, 0, rep(3, 14), 0, 0, 3, 3)
  

    rna_data_type <- "arc"
} else {
    vnames <- setdiff( colnames(well_info), c("well_id","pool_id"))
    vlabels <- gsub("_"," ", vnames)
    vartypes <- rep(NA, length(vnames))
    
    getDigits <- function(vNumeric){
      vChar <- as.character(vNumeric)
      is_decimal <- any(grepl("[.]", vChar))
      if(is_decimal){
        decimal_digits <- nchar(vChar) - (nchar(gsub("[.].*","", vChar))+1)
        n_digits <- max(decimal_digits)
      } else {
        n_digits <- 0
      }
      n_digits
    }
    digitsRound <- sapply(subset(well_info, select = vnames), getDigits)
    
    rna_data_type <- "unknown"
}

  df_vars <- data.frame(Category = vartypes,
                        Variable_name = vlabels,
                        Variable = vnames,
                        Round = digitsRound)

Pool Summary

Summary by pool or by entire non-hashed batch

stm("Creating scrna well cellranger summary table")

if(rna_data_type == "rna"){
  pool_info <- well_info %>%
  dplyr::group_by(pool_id) %>%
  dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"),
            total_reads = formatC(sum(number_of_reads), big.mark = ",", digits = 0, format = "f"), .groups = "drop")
} else if (rna_data_type == "arc"){
  pool_info <- well_info %>%
  dplyr::group_by(pool_id) %>%
  dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"),
            total_sequenced_read_pairs = formatC(sum(gex_sequenced_read_pairs), big.mark = ",", digits = 0, format = "f"), .groups = "drop")
} else {
  print("Warning: No total reads column detected in well metadata")
  pool_info <- well_info %>%  
  dplyr::group_by(pool_id) %>%  
  dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"), 
                   total_reads = NA, .groups = "drop")
}

names(pool_info) <- stringr::str_to_title(gsub("_", " ", names(pool_info)))
pool_info %>%
  gt::gt() %>%
  gt::cols_align(align = "right", columns = 2:3)

Pool Id Total Cells Total Sequenced Read Pairs
X070-P1 53,132 2,081,986,539
Return to Contents

Detailed Well Summary

stm("Creating detailied scrna well cellranger table")

unique_pools <- sort(unique(well_info$pool_id))  

well_summary_table <- well_info %>% 
  gather(key = Variable, value = Value, all_of(vnames)) %>%   # all variables long
  full_join(df_vars, by = "Variable") %>%  
  group_by(pool_id, Category, Variable, Variable_name) %>% 
  summarize(Median = formatC(median(Value, na.rm=T), big.mark = ",", digits = unique(Round), format = "f"),
            Range = get_range(Value, digits = unique(Round), verbose = F), 
            `CV%` = round(sd(Value)/mean(Value)*100, 1),
            .groups = "drop") %>% 
  arrange(Category, Variable_name) %>% 
  tidyr::pivot_wider(id_cols = c("Category", "Variable", "Variable_name"), 
                     names_from = pool_id, 
                     values_from = c("Median", "Range", "CV%"),
                     names_glue = "{pool_id}__{.value}",
                     names_sort = TRUE) %>%
  mutate(Plot = sprintf("[Plot](#%s)", Variable)) %>% 
  select(Category, Variable_name, contains(unique_pools), Plot) # reorder cols by pools first then stats, keep only the clean var name

gt_table <- well_summary_table %>% 
  gt::gt() %>% 
  gt::fmt_markdown(columns = "Plot") %>% # convert the plot column text to markdown to activate links. these links generated with plots below.
  gt::cols_width(vars(Category) ~ px(100),
                 vars(Variable_name) ~ px(150),
                 ends_with("Median") ~ px(100),
                 ends_with("Range") ~ px(100),
                 ends_with("CV%") ~ px(50),
                 vars(Plot) ~ px(60)) %>% 
  gt::tab_options(table.font.size = 11, column_labels.font.size = 12) %>%
  gt::tab_spanner_delim(delim = "__") %>% 
  gt::cols_align(align = "right",
                 columns = c(ends_with("Median"), ends_with("Range"))) %>% 
  gt::cols_align(align = "center",
                 columns = vars(Plot)) %>% 
  gt::cols_label(Variable_name = "Variable")

gt_table
Category Variable X070-P1 Plot
Median Range CV%
Cells Estimated Number of Cells 17,743 17,190-18,199 2.9
Cells Fraction Transcriptomic Reads in Cells 0.885 0.880-0.886 0.4
Cells Mean Raw Reads per Cell 39,754 35,614-42,379 8.7
Cells Median Genes per Cell 1,992 1,962-2,050 2.2
Cells Median UMI per Cell 4,130 4,048-4,297 3.1
Cells Total Genes Detected 30,628 30,599-30,756 0.3
Mapping Reads Mapped Antisense to Gene 0.166 0.165-0.171 1.7
Mapping Reads Mapped Confidently to Exonic Regions 0.392 0.384-0.393 1.2
Mapping Reads Mapped Confidently to Genome 0.905 0.904-0.905 0.1
Mapping Reads Mapped Confidently to Intergenic Regions 0.071 0.071-0.072 0.3
Mapping Reads Mapped Confidently to Intronic Regions 0.442 0.441-0.449 1.0
Mapping Reads Mapped Confidently to Transcriptome 0.659 0.655-0.661 0.5
Mapping Reads Mapped to Genome 0.968 0.968-0.970 0.1
Sequencing Percent Duplicates 0.775 0.758-0.781 1.5
Sequencing Q30 Bases in Barcode 0.958 0.956-0.959 0.2
Sequencing Q30 Bases in RNA Read 2 0.927 0.922-0.930 0.4
Sequencing Q30 Bases in Sample Index i1 0.974 0.940-0.974 2.0
Sequencing Q30 Bases in Sample Index i2 0.955 0.942-0.958 0.9
Sequencing Q30 Bases in UMI 0.959 0.957-0.959 0.1
Sequencing Reads with TSO 0.119 0.116-0.130 6.1
Sequencing Sequenced Read Pairs 705,357,243 648,135,670-728,493,626 6.0
Sequencing Valid Barcodes 0.940 0.939-0.941 0.1
Sequencing Valid UMIs 0.999 0.999-0.999 0.0

Expand table of statistics per well

qc_table(well_info)

Return to Contents

Plots of Well-Level Metrics

stm("Generating sequencing and alignment QC plots")

verpal <- hcl.colors(n = n_vars, palette = "viridis")

# Plots
for (i in seq_along(vnames)){
  df <- data.table::copy(well_info)
  spec <- vnames[i]
  slabel <- vlabels[i]
  df <- as.data.frame(df)
  df$spec_col <- df[,spec]
  med_val <- median(df$spec_col)
  cv <- round(sd(df$spec_col)/mean(df$spec_col)*100, 2)
  n <- sum(!is.na(df$spec_col))
  
  g <- ggplot(df, aes(well_id, spec_col)) +
    geom_bar(stat = "identity", fill = verpal[i]) + 
    geom_hline(yintercept = med_val, linetype = "dashed", color = "red")+
    scale_y_continuous(sec.axis = dup_axis(breaks = med_val, labels = med_val, name = ""))+
    xlab("Well") +
    ylab(slabel) +
    facet_wrap(~pool_id, ncol = n_pools, scales = "free_x", drop = TRUE) +
    ggtitle(slabel, 
            subtitle = sprintf("Median=%s    CV=%.1f%%    N=%s", med_val, cv, n)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
  
  # Plot-specific hyperlink definition
  cat(sprintf('\n<a id="%s"></a>', spec), labels = "", sep = "\n")
  
  # Output plot
  suppressWarnings(print(g))
  
  # Link back to top of section
  cat("  \n[Return to Contents](#rna_seq_well_top)", labels = "", sep = "\n")
  
}


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents

Count Stats per Well

Read Counts by Well

# Reads per hto cat
stm("Generating read count violin plots")

# Reads per hto plot
g_read <- qc_violin_plot(rna_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_reads",
                        name_y = "N Reads per Cell",
                        log_y = TRUE,
                        fill = "dodgerblue") +
  ggtitle("Reads per Well")

temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4

batchreporter::make_subchunk(g_read, subchunk_name = "well_read_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

UMI Counts by Well

# Reads per hto cat
stm("Generating umi count violin plots")

# UMI per hto plot
g_umi <- qc_violin_plot(rna_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_umis",
                        name_y = "N UMIs per Cell",
                        log_y = TRUE,
                        fill = "purple") +
  ggtitle("UMIs per Well")

temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4

batchreporter::make_subchunk(g_umi, subchunk_name = "well_umi_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Gene Counts by Well

# Reads per hto cat
stm("Generating gene count violin plots")

# Reads per hto plot
g_genes <- qc_violin_plot(rna_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_genes",
                        name_y = "N Genes per Cell",
                        log_y = TRUE,
                        fill = "orangered") +
  ggtitle("Genes per Well")

temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4

batchreporter::make_subchunk(g_genes, subchunk_name = "well_gene_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Mitochondrial UMIs

Fraction Mitochondrial UMI

g_bar <- ggplot(rna_meta, aes(well_id,  fill = factor(fct_mito_group, 
                                                      levels = rev(fct_mito_grp_labels)))) +
  geom_bar() +
  labs(fill = "Fraction Mitochondrial UMIs") +
  ylab("N Cells") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

g_pl <- plotly::ggplotly(g_bar) %>%
  layout(hovermode = 'compare')

tempwidth <- n_samples*0.4 + 2

make_subchunk(g_pl, subchunk_name = "fraction_mito_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = tempwidth, fig.height = 6, echo = FALSE))

Return to Contents

Fraction Mito UMIs by UMI Counts

stm("Generating mt umi vs umi count scatter plots")

# Reads per hto plot
g_mito_umi <- ggplot(rna_meta, aes(n_umis, fct_mito_umi)) +
    geom_point(color = "purple", alpha = 0.2) +
    xlab("UMI per Cell (log10 scale)") +
    ylab("Fraction Mitochondrial UMI") +
    scale_x_log10() +
    ggtitle("Fraction Mitochondrial UMI vs UMI per Cell") 
g_mito_umi

Expand Per-Well Plot

stm("Generating mt umi vs umi count scatter plots by well")

n_wells_plot <- length(unique(rna_meta$well_id))
ncols_plot <- 6
nrows_plot <- ceiling(n_wells_plot/ncols_plot)

# Reads per hto plot
g_mito_umi_well <- g_mito_umi +
    facet_wrap(~well_id, ncol = ncols_plot)

temp_figwidth <- 2*min(n_wells_plot, ncols_plot)+0.3
temp_figheight <- 2*nrows_plot + 0.3

make_subchunk(g_mito_umi_well, subchunk_name = "fraction_mito_umis_well_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight, echo = FALSE))

Return to Contents

Fraction Mito UMIs by Gene Counts

stm("Generating mt umi vs gene count scatter plots")

# Reads per hto plot
g_mito_gene <- ggplot(rna_meta, aes(n_genes, fct_mito_umi)) +
    geom_point(color = "orangered", alpha = 0.2) +
    xlab("Genes per Cell (log10 scale)") +
    ylab("Fraction Mitochondrial UMI") +
    scale_x_log10() +
    ggtitle("Fraction Mitochondrial UMI vs UMI per Cell") 
g_mito_gene

Expand Per-Well Plot

stm("Generating mt umi vs gene count scatter plots by well")

n_wells_plot <- length(unique(rna_meta$well_id))
ncols_plot <- 6
nrows_plot <- ceiling(n_wells_plot/ncols_plot)

# Reads per hto plot
g_mito_gene_well <- g_mito_gene +
    facet_wrap(~well_id, ncol = ncols_plot)

temp_figwidth <- 2*min(n_wells_plot, ncols_plot)+0.3
temp_figheight <- 2*nrows_plot + 0.3

make_subchunk(g_mito_gene_well, subchunk_name = "fraction_mito_genes_well_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight, echo = FALSE))

Return to Contents

Data Quality UMAP

Expand code

# Create Seurat object
stm("Creating Seurat object from merged data for scRNA UMAP")
merged_so <- Seurat::CreateSeuratObject(counts = rna_counts_sampled)

# Normalize data
stm("Normalizing data for scRNA UMAP")
merged_so <- Seurat::NormalizeData(object = merged_so,
                                   normalization.method = "LogNormalize",
                                   scale.factor = 10000,
                                   margin = 1)

normCounts <- merged_so[["RNA"]]@data
pc_dims <- min(ncol(merged_so) - 1, 50)
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))

stm("Finding Variable Features for scRNA UMAP")
merged_so <- FindVariableFeatures(object = merged_so)

stm("Scaling Data for scRNA UMAP")
merged_so <- ScaleData(object = merged_so, verbose = FALSE)

stm("Running PCA for scRNA UMAP")
merged_so <- RunPCA(object = merged_so, npcs = pc_dims, verbose = FALSE)
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))

stm("Determining dimensionality via jackstraw method for scRNA UMAP")

labels_order <- rna_meta_sampled$well_id[match(colnames(merged_so),rna_meta_sampled$barcodes)]
names(labels_order) <- colnames(merged_so)

jackstraw_cells <- sample_cells(labels_order, 500, seed = 3030)

jackstraw_so <- merged_so[, jackstraw_cells]

jackstraw_so <- JackStraw(object = jackstraw_so,
                         dims = pc_dims,
                         num.replicate = 50,  
                         verbose = FALSE)
jackstraw_so <- ScoreJackStraw(object = jackstraw_so,
                              dims = 1:pc_dims)

pc_pvals <- jackstraw_so@reductions$pca@jackstraw$overall.p.values[,2]
good_pcs <- pc_pvals < 0.05

nPC <- sum(good_pcs)

pc_var <- Stdev(merged_so, reduction = "pca")^2
total_var <- merged_so@reductions$pca@misc$total.variance
var_selected_pc <- sum(pc_var[good_pcs])/total_var
cumvar_string <- sprintf(fmt = "%.1f", var_selected_pc*100)

stm(sprintf("Selected %s significant pcs via JackStraw, %s%% explained variation for scRNA UMAP", nPC, cumvar_string))

pc_embeddings <- merged_so@reductions$pca@cell.embeddings

# stm(sprintf("Using %s principal components for umap",nPC))
# Run UMAP
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))
stm(sprintf("Running UMAP on selected coordinates for scRNA UMAP"))

merged_so <- Seurat::RunUMAP(merged_so,
                                   dims = c(1:50)[good_pcs],
                                   umap.method = "uwot",
                                   seed.use = 3,
                                   verbose = FALSE)
# Get UMAP coordinates
umapDF <- merged_so[["umap"]]@cell.embeddings %>%
          as.data.frame() %>%
          dplyr::rename(UMAP_1_merged = UMAP_1, UMAP_2_merged = UMAP_2) %>%
          tibble::rownames_to_column(var = "barcodes")
umapDF <- merge(umapDF, rna_meta_sampled, by = "barcodes")
rownames(umapDF) <- umapDF$barcodes

umapDF <- umapDF[rownames(merged_so@"meta.data"), , drop = F]

normCounts <- merged_so[["RNA"]]@data
if(!all(rownames(umapDF) == colnames(normCounts))){
  stop("Merged UMAP dataframe does not match columns in normalized counts")
}

Batch-level UMAP of 2000 randomly sampled cells per well using 34 principal components (21.3% explained variance) selected by jackstraw.

stm("Plotting Batch scRNA UMAP by Data Quality Metrics")

# Gene scaling
max_genes <- max(8000, rna_meta$n_genes)
max_umi <- max(60000, rna_meta$n_umis)
point_size <- 0.2


# Cell Types
g_base <- ggplot(umapDF, aes(UMAP_1_merged, UMAP_2_merged))

# fraction mitochondrial umi
stm("Plotting Fraction Mito UMAP")
g1 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "Fraction Mitochondrial UMIs",
                 point_size = point_size,
                 color_col = "fct_mito_umi",
                 scale_color_fun = scale_color_fct_mito)

# N Genes
stm("Plotting N Genes UMAP")
g2 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "N Genes",
                 point_size = point_size,
                 color_col = "n_genes",
                 scale_color_fun = scale_color_genes(max_genes)
)

# N UMIs
stm("Plotting N UMIs UMAP")
g3 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "N UMIs",
                 point_size = point_size,
                 color_col = "n_umis",
                 scale_color_fun = scale_color_umis(max_umi)
)


# Wells
stm("Plotting Well UMAP")
cols_well <- H5weaver::varibow(n_colors = length(unique(umapDF$well_id)))

scale_color_well <- function(...){
    scale_color_manual(values = cols_well, ...)
}
g4 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "Well ID",
                 point_size = point_size,
                 color_col = "well_id",
                 scale_color_fun = scale_color_well
) +
  guides(colour = guide_legend(override.aes = list(size = 3)))


aligned_plots <- cowplot::align_plots(g1, g2, g3, g4, align = "hv", axis = "tblr")  # uniform plot sizing

cowplot::plot_grid(aligned_plots[[1]],
                   aligned_plots[[2]],
                   aligned_plots[[3]],
                   aligned_plots[[4]],
                   ncol = 2)

Return to Contents


scRNA seq report well module v.1.0.0, Lauren Okada

ADT Well

Session Preparation

Expand output

Load Libraries
quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}
quiet_library(ggplot2)
quiet_library(Seurat)
quiet_library(viridis)
quiet_library(dplyr)
quiet_library(plyr)
quiet_library(data.table)
Locate input files
stm("Identifying files for ADT analysis")

# Input directory in_adt should be defined in the parent Rmarkdown report.
if(is.null(in_adt)) {
  warning("No input directory for adt batch report, defaulting to test data")
  in_adt <- system.file("extdata/X070/adt", package = "batchreporter")
} 

adt_count_files <- list.files(in_adt,
                              pattern = "Tag_Counts.csv")

well_names <- gsub('.{15}$', '', adt_count_files)

adt_count_filepaths <- list.files(in_adt, pattern = "Tag_Counts.csv", full.names = T)
adt_pos_count_filepaths <- list.files(in_adt, pattern = "positive_tag_counts.csv", full.names = T)
adt_pos_count_files <- basename(adt_pos_count_filepaths)

cat(sprintf("IN ADT Tag Count files: \n\t%s", paste(adt_count_files, collapse = "\n\t")), sep = "\n")
## IN ADT Tag Count files: 
##  X070-EP1C1W1_Tag_Counts.csv
##  X070-EP1C1W2_Tag_Counts.csv
##  X070-EP1C1W3_Tag_Counts.csv
stm(sprintf("IN ADT Tag Count files: \n\t%s", paste(adt_count_files, collapse = "\n\t")))  

cat(sprintf("IN POSITIVE ADT Tag Count files: \n\t%s", paste(adt_pos_count_files, collapse = "\n\t")), sep = "\n")
## IN POSITIVE ADT Tag Count files: 
##  X070-EP1C1W1_adt_positive_tag_counts.csv
##  X070-EP1C1W2_adt_positive_tag_counts.csv
##  X070-EP1C1W3_adt_positive_tag_counts.csv
stm(sprintf("IN POSITIVE ADT Tag Count files: \n\t%s", paste(adt_pos_count_files, collapse = "\n\t")))
Load Inputs

Expand output

Create pos & neg droplet matrices, then create seurat objects

Merge Seurat Objects

# all_merge <- Reduce(merge,seurat_list)

merge_seurat <- function(so_list){
  if (length(so_list) > 2){
    temp <- merge(x = so_list[[1]], y = so_list[[2]])
    i = 3
    while (i < length(so_list) + 1){
      temp <- merge(x = temp, y = so_list[[i]])
      i = i + 1
    }
    return(temp)
  }
  else{
    temp <- merge(x = so_list[[1]], y = so_list[[2]])
  }
  return(temp)
}

all_merge <- merge_seurat(seurat_list)

Split Violin Plot

Return to Contents

UMAP clustering for ADTs

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 53132
## Number of edges: 1375642
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9449
## Number of communities: 45
## Elapsed time: 6 seconds

UMAP clustering ADTs by Well
DimPlot(adt_positives, group.by = 'well')

Return to Contents

n_features <- length(rownames(adt_positives))
height = ceiling((n_features / 3) * 2)

UMAP clustering for ADTs individually

Return to Contents

ADT UMIs Dataframe construction - start here

i = 1
colsums_list <- list()
while (i < length(count_list) + 1){
  temp_adt_so <- seurat_list[[i]]
  temp_adt_so <- SetIdent(temp_adt_so, value = 'orig.ident')
  temp_adt_positives <- subset(temp_adt_so, idents = 'Cells')
  temp_adt_pos_mtx <- t(data.frame(temp_adt_positives@assays$RNA@counts))
  colsums_df <- data.frame(colSums(temp_adt_pos_mtx))
  colnames(colsums_df) <- 'Count'
  colsums_df$ADT <- rownames(colsums_df)
  colsums_df$Well <- rep(well_names[[i]], length(rownames(colsums_df)))
  colsums_list[[i]] <- colsums_df
  i = i + 1
}

combined_colsums <- bind_rows(colsums_list, .id = "column_label")

ADT UMIs plot

ggplot(combined_colsums, aes(x = reorder(ADT, -Count), y = Count, color = Well)) + geom_point(size = 5) + 
  geom_hline(yintercept=median(combined_colsums$Count), linetype="dashed", color = "blue") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + scale_y_log10() 

ggplot(combined_colsums, aes(x = reorder(ADT, -Count), y = Count, color = Well)) + geom_point(size = 5) + 
  geom_hline(yintercept=median(combined_colsums$Count), linetype="dashed", color = "blue") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))  

Return to Contents


ADT report module v.1.0.0, Zach Thomson

scATAC Well

Data Processing

Session Preparation
Load libraries:
start_time_atac <- Sys.time()

quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}
quiet_library(data.table)
quiet_library(H5weaver)
quiet_library(ggplot2)
quiet_library(cowplot)
quiet_library(jsonlite)
quiet_library(purrr)
options(stringsAsFactors = FALSE)

Declaring start

stm("Starting scATAC Batch Report module")
Argument parsing
if(is.null(in_atac)) {
  in_atac <- system.file("testdata/batch_qc", package = "ATAComb")
  batch_id <- "X055"
  out_dir <- tempdir()
} 

stm(paste0("IN  results dir      : ", in_atac))
stm(paste0("IN  BatchID          : ", batch_id))
stm(paste0("OUT Directory        : ", out_dir))
Input Parameters
print(c(
  paste0("IN  results dir      : ", in_atac),
  paste0("IN  BatchID          : ", batch_id),
  paste0("OUT H5 directory     : ", out_dir)
))
## [1] "IN  results dir      : ~/Packages/batchreporter/inst/extdata/X070/scatac"
## [2] "IN  BatchID          : X000"                                             
## [3] "OUT H5 directory     : ~/Projects/X070/test_reports"
Check Input Files
if(!dir.exists(in_atac)) {
  stm(paste("ERROR: Cannot find IN results dir:", in_atac))
  stop()
}
if(!dir.exists(out_dir)) {
  stm(paste("Creating output directory:", out_dir))
  dir.create(out_dir)
}
out_prefix <- file.path(out_dir, paste0(batch_id, "_"))
Check available files

Unfiltered metadata

meta_files <- list.files(in_atac, 
                         pattern = "_all_metadata.csv.gz$",
                         full.names = TRUE)
if(length(meta_files) == 0) {
  stop("Can't find unfiltered metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Full Metadata Files:")
for(meta_file in meta_files) {
  stm(meta_file)
  print(meta_file)
}
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W1_all_metadata.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W2_all_metadata.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W3_all_metadata.csv.gz"
meta_list <- map(meta_files, fread)
sample_names <- sub(".+/","",sub("_all_metadata.csv.gz","",meta_files))
names(meta_list) <- sample_names

Filtered metadata

filt_meta_files <- list.files(in_atac, 
                         pattern = "_filtered_metadata.csv.gz$",
                         full.names = TRUE)

if(length(filt_meta_files) < length(meta_files)) {
  stop("Can't find filtered metadata files. Check input directory for *_filtered_metadata.csv.gz files.")
} else if(length(filt_meta_files) > length(meta_files)) {
  stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Filtered Metadata Files:")
for(filt_meta_file in filt_meta_files) {
  stm(filt_meta_file)
  print(filt_meta_file)
}
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W1_filtered_metadata.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W2_filtered_metadata.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W3_filtered_metadata.csv.gz"
filt_meta_list <- map(filt_meta_files, fread)
names(filt_meta_list) <- sub(".+/","",sub("_filtered_metadata.csv.gz","",filt_meta_files))
filt_meta_list <- filt_meta_list[sample_names]

Saturation projections

sat_files <- list.files(in_atac,
                        pattern = "_saturation_projection.csv.gz$",
                        full.names = TRUE)

if(length(sat_files) < length(meta_files)) {
  stop("Can't find all saturation files. Check input directory for *_saturation_projection.csv.gz files.")
} else if(length(sat_files) > length(meta_files)) {
  stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Saturation Projection Files:")
for(sat_file in sat_files) {
  stm(sat_file)
  print(sat_file)
}
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W1_saturation_projection.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W2_saturation_projection.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W3_saturation_projection.csv.gz"
names(sat_files) <- sub(".+/","",sub("_saturation_projection.csv.gz","",sat_files))
sat_files <- sat_files[sample_names]

sat_list <- map(sat_files, fread)

Fragment widths

width_files <- list.files(in_atac,
                          pattern = "_fragment_width_summary.csv.gz",
                          full.names = TRUE)

if(length(width_files) < length(meta_files)) {
  stop("Can't find all fragment width files. Check input directory for *_fragment_width_summary.csv.gz files.")
} else if(length(sat_files) > length(meta_files)) {
  stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Fragment Width Summary Files:")
for(width_file in width_files) {
  stm(width_file)
  print(width_file)
}
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W1_fragment_width_summary.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W2_fragment_width_summary.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W3_fragment_width_summary.csv.gz"
names(width_files) <- sub(".+/","",sub("_fragment_width_summary.csv.gz","",width_files))
width_files <- width_files[sample_names]

width_list <- map(width_files, fread)

Return to Contents

Combine metadata
filtered_meta <- do.call(rbind, filt_meta_list)
meta <- do.call(rbind, meta_list)

cutoffs <- list(altius_frac = 0.5,
                tss_frac = 0.2,
                peaks_frac = 0.2)

meta$pass_fail <- "pass"
for(i in seq_along(cutoffs)) {
  cut_name <- names(cutoffs)[i]
  cut_val <- cutoffs[[i]]
  cut_logic <- meta[[cut_name]] <= cut_val
  meta$pass_fail[cut_logic] <- "fail"
}

meta$filtered <- meta$barcodes %in% filtered_meta$barcodes
meta$mito_frac <- meta$n_mito / meta$n_fragments
meta$well_label <- paste0(meta$well_id, "\n", meta$pbmc_sample_id)

well_samples <- unique(meta[,c("well_id","pbmc_sample_id","well_label")])
Filter metadata based on cutoffs
stm("Filtering based on QC cutoffs")

meta <- meta %>%
  dplyr::left_join(dplyr::select(filtered_meta, barcodes, DoubletScore, DoubletEnrichment, TSSEnrichment), by = "barcodes")

filtered_meta <- meta
for(i in seq_along(cutoffs)) {
  cut_name <- names(cutoffs)[i]
  cut_val <- cutoffs[[i]]
  filtered_meta <- filtered_meta[filtered_meta[[cut_name]] > cut_val]
  filtered_meta <- filtered_meta[filtered_meta$filtered,]
}

filtered_meta$well_label <- paste0(filtered_meta$well_id, "\n", filtered_meta$pbmc_sample_id)

well_filtered_meta_list <- split(filtered_meta, filtered_meta$well_id)

Set up global metadata for reporting

meta$barcode_category <- "fail_qc"
meta$barcode_category[!meta$filtered & meta$pass_fail == "pass"] <- "pass_doublet"
meta$barcode_category[meta$filtered & meta$pass_fail == "pass"] <- "pass_singlet"

Return to Contents

QC Stats

qc_list <- list(report_type = "atac_batch_qc",
                report_datetime = as.character(start_time_atac),
                report_uuid = ids::uuid(use_time = TRUE),
                package = "ATAComb",
                package_version = sessionInfo()$otherPkgs$ATAComb$Version,
                batch_id = sub("_.+","",sample_names[1]))

out_json <- paste0(out_prefix, "atac_batch_qc_metrics.json")

Return to Contents

Barcode QC Stats
barcode_counts <- meta[,.(n_barcodes = nrow(.SD),
                            n_pass_qc = sum(.SD$pass_fail == "pass"),
                            n_fail_qc = sum(.SD$pass_fail == "fail"),
                            percent_fail = round(sum(.SD$pass_fail == "fail")/nrow(.SD)*100,2),
                            pass_singlets = sum(.SD$barcode_category == "pass_singlet"),
                            pass_doublets = sum(.SD$barcode_category == "pass_doublet"),
                            percent_doublets = round(sum(.SD$barcode_category == "pass_doublet")/sum(.SD$pass_fail == "pass")*100,2)),
                         .(well_id,pbmc_sample_id)]

qc_list$barcode_stats <- as.list(barcode_counts)

qc_table(barcode_counts)
qc_stacked_barplot(meta,
                   category_x = "well_label",
                   name_x = "Well ID",
                   category_y = "barcode_category",
                   category_name = "Barcode Category",
                   as_fraction = TRUE)

qc_aligned_barplot(meta,
                   category_x = "well_label",
                   name_x = "Well ID",
                   category_y = "barcode_category",
                   category_name = "Barcode Category")

qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "DoubletEnrichment",
                                     name_y = "Doublet Enrichment",
                                     log_y = FALSE,
                                     fill = "dodgerblue")

qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "DoubletScore",
                                     name_y = "Doublet Score",
                                     log_y = FALSE,
                                     fill = "dodgerblue")

qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "TSSEnrichment",
                                     name_y = "TSS Enrichment",
                                     log_y = FALSE,
                                     fill = "dodgerblue")

Return to Contents

Fragment QC stats

Plot Settings

n_grid_columns <- min(length(well_filtered_meta_list),4)
n_grid_rows <- ceiling(length(well_filtered_meta_list)/4)

grid_width <- n_grid_columns * 3
grid_height <- n_grid_rows * 3
fragment_stats <- filtered_meta[,.(n_singlets = nrow(.SD[.SD$filtered]),
                                   med_raw_frag = round(median(n_fragments),0),
                                   med_raw_perc_mito = round(median(mito_frac)*100,4),
                                   med_unique_frag = round(median(n_unique),0),
                                   med_unique_fritss = round(median(tss_frac),4),
                                   med_unique_frip = round(median(peaks_frac),4),
                                   med_unique_encode = round(median(altius_frac),4)
                                   ),
                                .(well_id,pbmc_sample_id)]

qc_list$fragment_stats <- as.list(fragment_stats)

qc_table(fragment_stats)

Return to Contents

Unique Fragments per Cell
category_reads_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "n_unique",
                                         name_y = "Unique Fragments",
                                         fill = "dodgerblue")
well_reads_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "n_unique",
                                     name_y = "Unique Fragments (Singlets)",
                                     fill = "dodgerblue")

reads_violin_list <- list(category_reads_violins, 
                          well_reads_violins)

plot_grid(plotlist = reads_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Return to Contents

Fraction of Raw Reads in Mitochondria per Cell
category_mito_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "mito_frac",
                                         name_y = "Fraction Mitochondrial",
                                         fill = "darkgreen",
                                        log_y = FALSE)
well_mito_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "mito_frac",
                                     name_y = "Fraction Mito. (Singlets)",
                                     fill = "darkgreen",
                                    log_y = FALSE)

mito_violin_list <- list(category_mito_violins, 
                          well_mito_violins)

plot_grid(plotlist = mito_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Return to Contents

Fraction of Reads in Peaks per Cell
category_frip_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "peaks_frac",
                                         name_y = "FRIP",
                                         fill = "orangered",
                                        log_y = FALSE)
well_frip_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "peaks_frac",
                                     name_y = "FRIP (Singlets)",
                                     fill = "orangered",
                                    log_y = FALSE)

frip_violin_list <- list(category_frip_violins, 
                          well_frip_violins)

plot_grid(plotlist = frip_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Reads vs peaks_frac scatter
qc_scatter_list <- map(well_filtered_meta_list,
                       function(well_meta) {
                         qc_scatter_plot(well_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "peaks_frac",
                                         name_y = "Frac Fragments in Peaks (peaks_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "orangered") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$peaks_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(well_meta$well_label[1])
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Fraction of Reads in TSS (+/- 2kb) per Cell
category_fritss_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "tss_frac",
                                         name_y = "FRITSS",
                                         fill = "mediumorchid3",
                                        log_y = FALSE)
well_fritss_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "tss_frac",
                                     name_y = "FRITSS (Singlets)",
                                     fill = "mediumorchid3",
                                    log_y = FALSE)

fritss_violin_list <- list(category_fritss_violins, 
                          well_fritss_violins)

plot_grid(plotlist = fritss_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Reads vs tss_frac scatter
qc_scatter_list <- map(well_filtered_meta_list,
                       function(well_meta) {
                         qc_scatter_plot(well_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "tss_frac",
                                         name_y = "Frac Fragments in TSS (tss_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "mediumorchid3") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$tss_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(well_meta$well_label[1])
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Fraction of Reads in ENCODE/Altius Index per Cell
category_enc_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "altius_frac",
                                         name_y = "FRIENCODE",
                                         fill = "darkred",
                                        log_y = FALSE)
well_enc_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "altius_frac",
                                     name_y = "FRIENCODE (Singlets)",
                                     fill = "darkred",
                                    log_y = FALSE)

enc_violin_list <- list(category_enc_violins, 
                          well_enc_violins)

plot_grid(plotlist = enc_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Reads vs altius_frac scatter
qc_scatter_list <-map(well_filtered_meta_list,
                       function(well_meta) {
                         qc_scatter_plot(well_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "altius_frac",
                                         name_y = "Frac Fragments in Altius (altius_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "darkred") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$altius_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(well_meta$well_label[1])
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Saturation metrics
all_sat <- map_dfr(1:length(sat_list),
                   function(x) {
                     sat <- sat_list[[x]]
                     sat_pbmc_sample <- sub(paste0(batch_id,"_"),"",names(sat_list)[x])
                     sat$pbmc_sample_id <- sat_pbmc_sample
                     sat$well_id <- well_samples$well_id[well_samples$pbmc_sample_id == sat_pbmc_sample]
                     sat$well_label <- well_samples$well_label[well_samples$pbmc_sample_id == sat_pbmc_sample]
                     sat
                   })
Saturation cutoffs
all_sat <- as.data.table(all_sat)
sat_cutoffs <- all_sat[,.(M_raw = max(.SD$M_raw_reads[.SD$type == "actual"]),
                          M_umis = max(.SD$M_umis[.SD$type == "actual"]),
                          M_signal = max(.SD$M_signal_umis[.SD$type == "actual"]),
                          M_raw_2x_UMIs = .SD$M_raw_reads[which.min(abs(.SD$ratio - 2))[1]],
                          M_raw_4x_UMIs = .SD$M_raw_reads[which.min(abs(.SD$ratio - 4))[1]],
                          M_raw_2x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 2))[1]],
                          M_raw_4x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 4))[1]],
                          M_raw_8x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 8))[1]]),
                       .(well_id,pbmc_sample_id)]

qc_list$saturation_stats <- as.list(sat_cutoffs)

qc_table(sat_cutoffs)
Saturation of Unique Fragments
reference_projections <- fread(system.file("reference/saturation/reference_projections.csv",
                                           package = "ATAComb"))
reference_projections$dataset <- paste0("ref_",reference_projections$dataset)

umi_saturation_plot <- ggplot() +
  geom_line(data = reference_projections,
            aes(x = n_raw_reads,
                y = expected_umis,
                group = dataset,
                color = dataset)) +
  geom_line(data = all_sat,
            aes(x = n_raw_reads,
                y = expected_umis,
                group = well_label,
                color = type),
            size = 2)  +
  scale_color_brewer("Data Type",
                     type = "qual",
                     palette = 2) +
  scale_x_continuous("N Raw Reads (millions)",
                     expand = c(0,0),
                     limits = c(0, 1.5e9),
                     breaks = seq(0, 1.5e9, by = 2.5e8),
                     labels = seq(0, 1500, by = 250)) +
  scale_y_continuous("N Unique Fragments (millions)", 
                     expand = c(0,0),
                     limits = c(0, 5e8),
                     breaks = seq(0, 5e8, by = 1e8),
                     labels = seq(0, 500, by = 100)) +
  theme_bw() +
  theme(panel.grid.minor = element_blank()) +
  facet_wrap(facets = vars(well_label), ncol = 4) +
  ggtitle("Saturation of All Unique Fragments")

umi_saturation_plot

Saturation of Signal Fragments

Signal fragments are reads in Altius Index regions in barcodes passing QC

signal_saturation_plot <- ggplot() +
  geom_line(data = reference_projections,
            aes(x = n_raw_reads,
                y = signal_umis,
                group = dataset,
                color = dataset)) +
  geom_line(data = all_sat,
            aes(x = n_raw_reads,
                y = signal_umis,
                group = well_label,
                color = type),
            size = 2)  +
  scale_color_brewer("Data Type",
                     type = "qual",
                     palette = 2) +
  scale_x_continuous("N Raw Reads (millions)",
                     expand = c(0,0),
                     limits = c(0, 1.5e9),
                     breaks = seq(0, 1.5e9, by = 2.5e8),
                     labels = seq(0, 1500, by = 250)) +
  scale_y_continuous("N Signal Fragments (millions)", 
                     expand = c(0,0),
                     limits = c(0, 2e8),
                     breaks = seq(0, 2e8, by = 2e7),
                     labels = seq(0, 200, by = 20)) +
  theme_bw() +
  theme(panel.grid.minor = element_blank()) +
  facet_wrap(facets = vars(well_label), ncol = 4) +
  ggtitle("Saturation of Signal Fragments")

signal_saturation_plot

Return to Contents

Plot Settings

n_grid_columns <- min(length(well_filtered_meta_list),2)
n_grid_rows <- ceiling(length(well_filtered_meta_list)/2)

grid_width <- n_grid_columns * 4
grid_height <- n_grid_rows * 2
Fragment Width Distributions
frag_widths <- map_dfr(1:length(width_list),
                   function(x) {
                     fw <- width_list[[x]]
                     fw_pbmc_sample <- sub(paste0(batch_id,"_"),"",names(width_list)[x])
                     fw$pbmc_sample_id <- fw_pbmc_sample
                     fw$well_id <- well_samples$well_id[well_samples$pbmc_sample_id == fw_pbmc_sample]
                     fw$well_label <- well_samples$well_label[well_samples$pbmc_sample_id == fw_pbmc_sample]
                     fw
                   })

frag_widths <- frag_widths[width <= 750]

ggplot(data = frag_widths) +
  geom_line(aes(x = width,
                y = frac,
                group = pass_fail,
                color = pass_fail),
            size = 1) +
  xlim(0, 750) +
  facet_wrap(vars(well_label), ncol = 2) +
  theme_bw()

Return to Contents

Write QC JSON

stm(paste0("Writing JSON to ",out_json))

qc_list_json <- jsonlite::toJSON(qc_list,
                                 auto_unbox = TRUE,
                                 pretty = TRUE)

writeLines(qc_list_json,
           out_json)

Return to Contents

Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6          viridis_0.5.1       viridisLite_0.3.0  
##  [4] Seurat_3.9.9.9010   future.apply_1.6.0  future_1.19.1      
##  [7] purrr_0.3.4         jsonlite_1.7.1      tidyr_1.1.2        
## [10] plotly_4.9.3        gt_0.2.2            cowplot_1.1.0      
## [13] dplyr_1.0.4         ggplot2_3.3.2       HTOparser_1.0.12   
## [16] H5weaver_1.2.0      data.table_1.13.2   rhdf5_2.32.4       
## [19] Matrix_1.2-18       batchreporter_1.1.0 optparse_1.6.6     
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15           colorspace_1.4-1     deldir_0.1-29       
##   [4] ellipsis_0.3.1       ggridges_0.5.2       spatstat.data_2.1-0 
##   [7] farver_2.0.3         leiden_0.3.3         listenv_0.8.0       
##  [10] DT_0.16              getopt_1.20.3        ggrepel_0.8.2       
##  [13] RSpectra_0.16-0      R.methodsS3_1.8.1    codetools_0.2-16    
##  [16] splines_4.0.3        knitr_1.30           polyclip_1.10-0     
##  [19] ica_1.0-2            cluster_2.1.0        R.oo_1.24.0         
##  [22] png_0.1-7            uwot_0.1.9.9000      shiny_1.5.0         
##  [25] sctransform_0.3.1    compiler_4.0.3       httr_1.4.2          
##  [28] backports_1.1.10     assertthat_0.2.1     fastmap_1.0.1       
##  [31] lazyeval_0.2.2       later_1.1.0.1        htmltools_0.5.1.1   
##  [34] tools_4.0.3          rsvd_1.0.3           igraph_1.2.6        
##  [37] gtable_0.3.0         glue_1.4.2           RANN_2.6.1          
##  [40] reshape2_1.4.4       Rcpp_1.0.5           spatstat_1.64-1     
##  [43] vctrs_0.3.6          nlme_3.1-149         crosstalk_1.1.0.1   
##  [46] lmtest_0.9-38        xfun_0.18            stringr_1.4.0       
##  [49] globals_0.13.1       mime_0.9             miniUI_0.1.1.1      
##  [52] lifecycle_0.2.0      irlba_2.3.3          goftest_1.2-2       
##  [55] ids_1.0.1            MASS_7.3-53          zoo_1.8-8           
##  [58] scales_1.1.1         promises_1.1.1       spatstat.utils_2.1-0
##  [61] parallel_4.0.3       RColorBrewer_1.1-2   yaml_2.2.1          
##  [64] reticulate_1.16      pbapply_1.4-3        gridExtra_2.3       
##  [67] sass_0.3.1           rpart_4.1-15         stringi_1.5.3       
##  [70] checkmate_2.0.0      commonmark_1.7       rlang_0.4.10        
##  [73] pkgconfig_2.0.3      matrixStats_0.57.0   evaluate_0.14       
##  [76] lattice_0.20-41      ROCR_1.0-11          tensor_1.5          
##  [79] Rhdf5lib_1.10.1      labeling_0.4.2       patchwork_1.0.1     
##  [82] htmlwidgets_1.5.3    tidyselect_1.1.0     RcppAnnoy_0.0.16    
##  [85] magrittr_1.5         R6_2.4.1             generics_0.0.2      
##  [88] DBI_1.1.1            mgcv_1.8-33          pillar_1.4.6        
##  [91] withr_2.3.0          fitdistrplus_1.1-1   survival_3.2-7      
##  [94] abind_1.4-5          tibble_3.0.4         crayon_1.3.4        
##  [97] uuid_0.1-4           KernSmooth_2.23-17   rmarkdown_2.4       
## [100] grid_4.0.3           digest_0.6.26        xtable_1.8-4        
## [103] httpuv_1.5.4         R.utils_2.10.1       munsell_0.5.0

Total time elapsed

end_time <- Sys.time()
diff_time <- end_time - start_time_atac
time_message <- paste0("Elapsed Time: ", 
                       round(diff_time, 3),
                       " ", units(diff_time))
print(time_message)
## [1] "Elapsed Time: 15.957 secs"
stm(time_message)
stm("10x ATAC Batch QC Report complete.")

Return to Contents


scATAC report module 1.0.0, Lucas Graybuck

Session Information

Input Directory:

in_dir 
## [1] "~/Packages/batchreporter/inst/extdata/X070"

Input Directory Contents:

folders <- list.dirs(in_dir, recursive = FALSE)

file_list <- lapply(folders, function(x){
  dir(x, recursive = TRUE)
})
names(file_list) <- basename(folders)
file_list
## $adt
## [1] "X070-EP1C1W1_adt_positive_tag_counts.csv"
## [2] "X070-EP1C1W1_Tag_Counts.csv"             
## [3] "X070-EP1C1W2_adt_positive_tag_counts.csv"
## [4] "X070-EP1C1W2_Tag_Counts.csv"             
## [5] "X070-EP1C1W3_adt_positive_tag_counts.csv"
## [6] "X070-EP1C1W3_Tag_Counts.csv"             
## 
## $hto
## [1] "X070-P1C1W1_hto_category_table.csv.gz"  
## [2] "X070-P1C1W1_hto_count_matrix.csv.gz"    
## [3] "X070-P1C1W1_hto_processing_metrics.json"
## [4] "X070-P1C1W2_hto_category_table.csv.gz"  
## [5] "X070-P1C1W2_hto_count_matrix.csv.gz"    
## [6] "X070-P1C1W2_hto_processing_metrics.json"
## [7] "X070-P1C1W3_hto_category_table.csv.gz"  
## [8] "X070-P1C1W3_hto_count_matrix.csv.gz"    
## [9] "X070-P1C1W3_hto_processing_metrics.json"
## 
## $scatac
##  [1] "X070_X070-MP1C1W1_all_metadata.csv.gz"          
##  [2] "X070_X070-MP1C1W1_filtered_metadata.csv.gz"     
##  [3] "X070_X070-MP1C1W1_fragment_width_summary.csv.gz"
##  [4] "X070_X070-MP1C1W1_saturation_projection.csv.gz" 
##  [5] "X070_X070-MP1C1W2_all_metadata.csv.gz"          
##  [6] "X070_X070-MP1C1W2_filtered_metadata.csv.gz"     
##  [7] "X070_X070-MP1C1W2_fragment_width_summary.csv.gz"
##  [8] "X070_X070-MP1C1W2_saturation_projection.csv.gz" 
##  [9] "X070_X070-MP1C1W3_all_metadata.csv.gz"          
## [10] "X070_X070-MP1C1W3_filtered_metadata.csv.gz"     
## [11] "X070_X070-MP1C1W3_fragment_width_summary.csv.gz"
## [12] "X070_X070-MP1C1W3_saturation_projection.csv.gz" 
## 
## $scrna
## [1] "X070-P1C1W1.h5" "X070-P1C1W2.h5" "X070-P1C1W3.h5"

Batch processing metadata:

in_batch_meta 
## [1] NA

Config File:

in_config
## [1] "~/Packages/batchreporter/inst/extdata/default_rna_config_v1.csv"

Key File:

in_key
## [1] "~/Packages/batchreporter/inst/extdata/example_sample_key_X070_dummy-nonhashed.csv"

Output Directory:

out_dir
## [1] "~/Projects/X070/test_reports"

Session Info:

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6          viridis_0.5.1       viridisLite_0.3.0  
##  [4] Seurat_3.9.9.9010   future.apply_1.6.0  future_1.19.1      
##  [7] purrr_0.3.4         jsonlite_1.7.1      tidyr_1.1.2        
## [10] plotly_4.9.3        gt_0.2.2            cowplot_1.1.0      
## [13] dplyr_1.0.4         ggplot2_3.3.2       HTOparser_1.0.12   
## [16] H5weaver_1.2.0      data.table_1.13.2   rhdf5_2.32.4       
## [19] Matrix_1.2-18       batchreporter_1.1.0 optparse_1.6.6     
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15           colorspace_1.4-1     deldir_0.1-29       
##   [4] ellipsis_0.3.1       ggridges_0.5.2       spatstat.data_2.1-0 
##   [7] farver_2.0.3         leiden_0.3.3         listenv_0.8.0       
##  [10] DT_0.16              getopt_1.20.3        ggrepel_0.8.2       
##  [13] RSpectra_0.16-0      R.methodsS3_1.8.1    codetools_0.2-16    
##  [16] splines_4.0.3        knitr_1.30           polyclip_1.10-0     
##  [19] ica_1.0-2            cluster_2.1.0        R.oo_1.24.0         
##  [22] png_0.1-7            uwot_0.1.9.9000      shiny_1.5.0         
##  [25] sctransform_0.3.1    compiler_4.0.3       httr_1.4.2          
##  [28] backports_1.1.10     assertthat_0.2.1     fastmap_1.0.1       
##  [31] lazyeval_0.2.2       later_1.1.0.1        htmltools_0.5.1.1   
##  [34] tools_4.0.3          rsvd_1.0.3           igraph_1.2.6        
##  [37] gtable_0.3.0         glue_1.4.2           RANN_2.6.1          
##  [40] reshape2_1.4.4       Rcpp_1.0.5           spatstat_1.64-1     
##  [43] vctrs_0.3.6          nlme_3.1-149         crosstalk_1.1.0.1   
##  [46] lmtest_0.9-38        xfun_0.18            stringr_1.4.0       
##  [49] globals_0.13.1       mime_0.9             miniUI_0.1.1.1      
##  [52] lifecycle_0.2.0      irlba_2.3.3          goftest_1.2-2       
##  [55] ids_1.0.1            MASS_7.3-53          zoo_1.8-8           
##  [58] scales_1.1.1         promises_1.1.1       spatstat.utils_2.1-0
##  [61] parallel_4.0.3       RColorBrewer_1.1-2   yaml_2.2.1          
##  [64] reticulate_1.16      pbapply_1.4-3        gridExtra_2.3       
##  [67] sass_0.3.1           rpart_4.1-15         stringi_1.5.3       
##  [70] checkmate_2.0.0      commonmark_1.7       rlang_0.4.10        
##  [73] pkgconfig_2.0.3      matrixStats_0.57.0   evaluate_0.14       
##  [76] lattice_0.20-41      ROCR_1.0-11          tensor_1.5          
##  [79] Rhdf5lib_1.10.1      labeling_0.4.2       patchwork_1.0.1     
##  [82] htmlwidgets_1.5.3    tidyselect_1.1.0     RcppAnnoy_0.0.16    
##  [85] magrittr_1.5         R6_2.4.1             generics_0.0.2      
##  [88] DBI_1.1.1            mgcv_1.8-33          pillar_1.4.6        
##  [91] withr_2.3.0          fitdistrplus_1.1-1   survival_3.2-7      
##  [94] abind_1.4-5          tibble_3.0.4         crayon_1.3.4        
##  [97] uuid_0.1-4           KernSmooth_2.23-17   rmarkdown_2.4       
## [100] grid_4.0.3           digest_0.6.26        xtable_1.8-4        
## [103] httpuv_1.5.4         R.utils_2.10.1       munsell_0.5.0

Total time elapsed

end_time_all <- Sys.time()
diff_time_all <- end_time_all - start_time_all
time_message_all <- paste0("Elapsed Time: ", 
                       round(diff_time_all, 3),
                       " ", units(diff_time_all))
print(time_message_all)
## [1] "Elapsed Time: 5.28 mins"
stm(time_message_all)
stm("Batch report process complete.")

Return to Top


NGS report v.1.0.0, Lauren Okada